Experiment & contact info
PIs: Rosalie Sears (searsr@ohsu.edu), Laura Heiser (heiserl@ohsu.edu)
Sample preparation: Zinab Doha (dohaz@ohsu.edu)
Library prep from single cells: Xi Li & Colin Daniel (danielc@ohsu.edu)
Analysis: Nick Calistri (calistri@ohsu.edu)
Sequencing performed by OHSU MPSSR
Analysis design
load rliger integrated data (s3_celltypes.rds)
separate each lineage with multiple clusters (epithelial, fibroblast, lymphoid, myeloid)
perform DE within each lineage
Convert cluster to celltype and annotate with relevant pathway/expression
Save integrated seurat obejct (s4_so_merge.rds) as well as the lineage subset seurat objects
Object names
epi = epithelial sni = stromal non-immune lym = lymphoid mye = myeloid
Set up
Load libraries
library(Matrix)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.1.2
## Registered S3 method overwritten by 'spatstat.geom':
## method from
## print.boxx cli
## Attaching SeuratObject
library(clusterProfiler)
##
## clusterProfiler v4.0.5 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141. doi: 10.1016/j.xinn.2021.100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:stats':
##
## filter
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.2
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:Matrix':
##
## expand, unname
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:grDevices':
##
## windows
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
Set seed
set.seed(4)
load s3_celltypes.rds
so_merge <- readRDS('analysis_files/s3_celltypes.rds')
so_merge@meta.data$barcode <- rownames(so_merge@meta.data)
# Ensure scale.data computed for all genes
if(nrow(so_merge@assays$RNA@scale.data) < nrow(so_merge@assays$RNA@counts)){
print('Scaling all features')
so_merge <- ScaleData(so_merge,
features = rownames(so_merge))
}else if(nrow(so_merge@assays$RNA@scale.data) == nrow(so_merge@assays$RNA@counts)){
print('ScaleData already computed for all features')
}
## [1] "ScaleData already computed for all features"
Create subset seurat objects
so_epi <- subset(so_merge, subset = lineage == 'epithelial')
so_lym <- subset(so_merge, subset = lineage == 'lymphoid')
so_mye <- subset(so_merge, subset = lineage == 'myeloid')
so_fibro <- subset(so_merge, subset = lineage == 'fibroblast')
Prepare DEG function
subset_ego <- function(curr_sub, curr_markers){
if(nrow(curr_markers) > 1){
curr_top <- curr_markers %>%
filter(avg_log2FC > 0.5) %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC)) %>%
filter(p_val_adj <= 0.05) %>%
slice_head(n = 5)
DoHeatmap(curr_sub, features = curr_top$gene)
for(i in unique(curr_markers$cluster)){
clust_markers <- curr_markers %>%
filter(cluster == i)
volcano_plot <- ggplot(clust_markers, aes(x = avg_log2FC, y = -log10(p_val_adj), label = gene))+
geom_point()+
ggrepel::geom_text_repel()+
ggtitle(paste0('Cluster: ', i))
print(volcano_plot)
print(paste0('Evaluating cluster: ', i))
curr_top <- curr_markers %>%
filter(cluster == i) %>%
filter(avg_log2FC >= 0.5) %>%
filter(p_val_adj <= 0.05)
curr_bottom <- curr_markers %>%
filter(cluster == i) %>%
filter(avg_log2FC <= -0.5) %>%
filter(p_val_adj <= 0.05)
ego_top <- enrichGO(gene = curr_top$gene,
OrgDb = org.Mm.eg.db,
keyType = 'SYMBOL',
ont = "ALL",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05)
if(if(!is.null(ego_top)){nrow(ego_top) > 0 & nrow(curr_top) >= 3}){
ego_top_genelist <- curr_top$avg_log2FC
names(ego_top_genelist) <- curr_top$gene
ego_top_genelist <- sort(ego_top_genelist, decreasing = TRUE)
p1 <- dotplot(ego_top, title = paste0('Cluster ', i))
p2 <- cnetplot(ego_top,
showCategory = 10,
foldChange = ego_top_genelist,
colorEdge = TRUE)
p3 <- heatplot(ego_top,
showCategory = 10,
foldChange = ego_top_genelist)
print(p1)
print(p2)
print(p3)
}else(
print('No upregulated GOs found')
)
}
}else{
print('FindAllMarkers did not return DEGs (only one cluster in celltype?)')
}
}
Epithelial subset analysis (epi)
UMAP and DE within subset
so_epi <- RunUMAP(so_epi, reduction = 'iNMF', dims = 1:50)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:55:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:55:32 Read 2824 rows and found 50 numeric columns
## 20:55:32 Using Annoy for neighbor search, n_neighbors = 30
## 20:55:32 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:55:32 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file39143f522e53
## 20:55:32 Searching Annoy index using 1 thread, search_k = 3000
## 20:55:33 Annoy recall = 100%
## 20:55:33 Commencing smooth kNN distance calibration using 1 thread
## 20:55:34 Initializing from normalized Laplacian + noise
## 20:55:34 Commencing optimization for 500 epochs, with 121422 positive edges
## 20:55:40 Optimization finished
DimPlot(so_epi, label = TRUE)+
coord_equal()
epi_markers <- FindAllMarkers(so_epi)
## Calculating cluster 2
## Calculating cluster 8
## Calculating cluster 10
## Calculating cluster 14.3
## Calculating cluster 19
write_csv(epi_markers, file = 'analysis_files/s4_epithelial_markers.csv')
epi_top <- epi_markers %>%
group_by(cluster) %>%
filter(p_val_adj <= 0.01) %>%
arrange(desc(avg_log2FC)) %>%
slice_head(n = 10)
# Heatmap of top markers
DoHeatmap(so_epi, features = epi_top$gene)
# Dendrogram of clusters
so_epi <- BuildClusterTree(so_epi)
PlotClusterTree(so_epi)
# Distribution of cells in each histology
ggplot(so_epi@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~histology)
ggplot(so_epi@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~cellfrac_type)
Cytokeratin profile of epithelial clusters
keratin_features <- grep(row.names(so_epi), pattern = '^Krt', value = TRUE)
keratin_aves <- AverageExpression(so_epi,
features = keratin_features,
assay = 'RNA')
keratin_aves_filtered <- keratin_aves$RNA[rowSums(keratin_aves$RNA) != 0,]
keratin_aves_top <- keratin_aves$RNA[rowSums(keratin_aves$RNA) >= quantile(rowSums(keratin_aves$RNA), 0.9), ]
pheatmap::pheatmap(keratin_aves_filtered)
pheatmap::pheatmap(keratin_aves_top)
DotPlot(so_epi,
features = row.names(keratin_aves_top),
cols = 'RdYlBu')+
coord_flip()
EGO on DEGs
subset_ego(curr_sub = so_epi,
curr_markers = epi_markers)
## Warning: ggrepel: 1025 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 2"
## Warning: ggrepel: 905 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 8"
## Warning: ggrepel: 1470 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 10"
## Warning: ggrepel: 400 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 14.3"
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1141 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 19"
CellMarkerDB markers
# Basal markers:
VlnPlot(so_epi, features = c('Acta2', 'Krt14', 'Krt5', 'Mylk', 'Vim'))
# Luminal Markers
VlnPlot(so_epi, features = c('Krt18', 'Krt19', 'Flot2', 'Cdh1'))
epi Celltype_l2
c2 - epi1_luminal_oxphos c8 - epi2_luminal c10 - epi3_basal c14.3 - epi4_proliferating c19 - epi5_luminal_ros-response
epi_dict <- tibble(cluster = sort(unique(so_epi@meta.data$cluster_l2)),
celltype = c('c2-luminal_oxphos',
'c8-luminal_gland_development',
'c10-basal_ecm_modulating',
'c14.3-proliferating',
'c19-luminal_ros_response'))
so_epi@meta.data$celltype_full <- plyr::mapvalues(x = so_epi@meta.data$cluster_l2,
from = epi_dict$cluster,
to = epi_dict$celltype)
DimPlot(so_epi,
group.by = 'celltype_full',
label = TRUE)+
coord_equal()
# pheatmap of celltype
epi_freq_table <- so_epi@meta.data %>%
dplyr::select(celltype_full, cellfrac_type) %>%
table()
epi_freq_table <- epi_freq_table[rowSums(epi_freq_table) != 0,]
pheatmap::pheatmap(prop.table(epi_freq_table, margin = 2),
display_numbers = epi_freq_table)
Fibroblast subset analysis (fibro)
UMAP and DE within subset
so_fibro <- RunUMAP(so_fibro, reduction = 'iNMF', dims = 1:50,
seed.use = 500)
## 20:59:22 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:59:22 Read 1533 rows and found 50 numeric columns
## 20:59:22 Using Annoy for neighbor search, n_neighbors = 30
## 20:59:22 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:59:22 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file391437ae1f97
## 20:59:22 Searching Annoy index using 1 thread, search_k = 3000
## 20:59:23 Annoy recall = 100%
## 20:59:23 Commencing smooth kNN distance calibration using 1 thread
## 20:59:24 Initializing from normalized Laplacian + noise
## 20:59:24 Commencing optimization for 500 epochs, with 61640 positive edges
## 20:59:28 Optimization finished
DimPlot(so_fibro, label = TRUE)+
coord_equal()
fibro_markers <- FindAllMarkers(so_fibro)
## Calculating cluster 7
## Calculating cluster 12
## Calculating cluster 17
write_csv(fibro_markers, file = 'analysis_files/s4_fibro_markers.csv')
fibro_top <- fibro_markers %>%
group_by(cluster) %>%
filter(p_val_adj <= 0.01) %>%
arrange(desc(avg_log2FC)) %>%
slice_head(n = 10)
# Heatmap of top markers
DoHeatmap(so_fibro, features = fibro_top$gene)
# Dendrogram of clusters
so_fibro <- BuildClusterTree(so_fibro)
PlotClusterTree(so_fibro)
# Distribution of cells in each histology
ggplot(so_fibro@meta.data, aes(x = seurat_clusters, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~histology)
ggplot(so_fibro@meta.data, aes(x = seurat_clusters, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~cellfrac_type)
EGO on DEGs
subset_ego(curr_sub = so_fibro,
curr_markers = fibro_markers)
## Warning: ggrepel: 658 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 7"
## Warning: ggrepel: 1144 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 12"
## Warning: ggrepel: 954 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 17"
c7 - fibro1_ctla2a_high c12 - fibro2_col11_high c17 - fibro3_anti-motility
FeaturePlot(so_fibro,
features = c('Mme', 'Ctla2a', 'Col11a1', 'Acta2'),
label = TRUE)
fibro_dict <- tibble(cluster = sort(unique(so_fibro@meta.data$cluster_l2)),
celltype = c('c7-ctla2a_high',
'c12-col11_high',
'c17-anti_motility'))
so_fibro@meta.data$celltype_full<- plyr::mapvalues(x = so_fibro@meta.data$cluster_l2,
from = fibro_dict$cluster,
to = fibro_dict$celltype)
DimPlot(so_fibro,
group.by = 'celltype_full',
label = TRUE)+
coord_equal()
# pheatmap of celltype_l2
fibro_freq_table <- so_fibro@meta.data %>%
dplyr::select(celltype_full, cellfrac_type) %>%
table()
fibro_freq_table <- fibro_freq_table[rowSums(fibro_freq_table) != 0,]
pheatmap::pheatmap(prop.table(fibro_freq_table, margin = 2),
display_numbers = fibro_freq_table)
Lymphoid subset analysis (lym)
UMAP and DE within subset
so_lym <- RunUMAP(so_lym, reduction = 'iNMF', dims = 1:50)
## 21:00:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:00:58 Read 3414 rows and found 50 numeric columns
## 21:00:58 Using Annoy for neighbor search, n_neighbors = 30
## 21:00:58 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:00:59 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file3914186808
## 21:00:59 Searching Annoy index using 1 thread, search_k = 3000
## 21:00:59 Annoy recall = 100%
## 21:01:00 Commencing smooth kNN distance calibration using 1 thread
## 21:01:01 Initializing from normalized Laplacian + noise
## 21:01:01 Commencing optimization for 500 epochs, with 163904 positive edges
## 21:01:09 Optimization finished
DimPlot(so_lym, label = TRUE)+
coord_equal()
lym_markers <- FindAllMarkers(so_lym)
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 9
## Calculating cluster 13
## Calculating cluster 14.1
## Calculating cluster 21
## Calculating cluster 22
write_csv(lym_markers, file = 'analysis_files/s4_lymphoid_markers.csv')
lym_top <- lym_markers %>%
group_by(cluster) %>%
filter(p_val_adj <= 0.01) %>%
arrange(desc(avg_log2FC)) %>%
slice_head(n = 10)
# Heatmap of top markers
DoHeatmap(so_lym, features = lym_top$gene)
# Dendrogram of clusters
so_lym <- BuildClusterTree(so_lym)
PlotClusterTree(so_lym)
# Distribution of cells in each histology
ggplot(so_lym@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~histology)
ggplot(so_lym@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~cellfrac_type)
FeaturePlots
lym_moi <- c('Cd3e', 'Cd4', 'Trdc', 'Tcrg-C1', 'Cd8a', 'Foxp3', 'Mki67', 'Cd19', 'Cd79b', 'Ms4a1', 'Gzma')
DotPlot(so_lym,
features = lym_moi,
cols = 'RdYlBu')+
coord_flip()
EGO on DEGs
subset_ego(curr_sub = so_lym,
curr_markers = lym_markers)
## Warning: ggrepel: 235 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 5"
## Warning: ggrepel: 692 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 6"
## Warning: ggrepel: 547 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 9"
## Warning: ggrepel: 630 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 13"
## Warning: ggrepel: 1072 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 14.1"
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1119 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 21"
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 853 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 22"
c5 - Cd4 T cell c6 - Gamma Delta T cells c9 - CD8 T cell c13 - Treg c14.1 - Proliferating lymphoid c21 - B cells (Ms4a1{Cd20}) c22 - NK (Gzma)
lym_dict <- tibble(cluster = sort(unique(so_lym@meta.data$cluster_l2)),
celltype = c('c5-Cd4_T',
'c6-GammaDelta_T',
'c9-Cd8_T',
'c13-Treg',
'c14.1-proliferating',
'c21-B',
'c22-NK'))
so_lym@meta.data$celltype_full <- plyr::mapvalues(x = so_lym@meta.data$cluster_l2,
from = lym_dict$cluster,
to = lym_dict$celltype)
DimPlot(so_lym,
group.by = 'celltype_full',
label = TRUE)+
coord_equal()
# pheatmap of celltype_full
lym_freq_table <- so_lym@meta.data %>%
dplyr::select(celltype_full, cellfrac_type) %>%
table()
lym_freq_table <- lym_freq_table[rowSums(lym_freq_table) != 0,]
pheatmap::pheatmap(prop.table(lym_freq_table, margin = 2),
display_numbers = lym_freq_table)
DotPlot(so_lym,
features = lym_moi,
cols = 'RdYlBu',
group.by = 'celltype_full')+
theme_bw()+
coord_flip()+
RotatedAxis()
myeloid subset analysis (imm)
UMAP and DE within subset
so_mye <- RunUMAP(so_mye, reduction = 'iNMF', dims = 1:50)
## 21:05:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:05:23 Read 5706 rows and found 50 numeric columns
## 21:05:23 Using Annoy for neighbor search, n_neighbors = 30
## 21:05:23 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:05:24 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file3914464e3059
## 21:05:24 Searching Annoy index using 1 thread, search_k = 3000
## 21:05:25 Annoy recall = 100%
## 21:05:26 Commencing smooth kNN distance calibration using 1 thread
## 21:05:27 Initializing from normalized Laplacian + noise
## 21:05:27 Commencing optimization for 500 epochs, with 269200 positive edges
## 21:05:40 Optimization finished
DimPlot(so_mye, label = TRUE)+
coord_equal()
mye_markers <- FindAllMarkers(so_mye)
## Calculating cluster 1
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 14.2
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 18
## Calculating cluster 20
## Calculating cluster 24
write_csv(mye_markers, file = 'analysis_files/s4_myeloid_markers.csv')
mye_top <- mye_markers %>%
group_by(cluster) %>%
filter(p_val_adj <= 0.01) %>%
arrange(desc(avg_log2FC)) %>%
slice_head(n = 10)
# Heatmap of top markers
DoHeatmap(so_mye, features = mye_top$gene)
# Dendrogram of clusters
so_mye <- BuildClusterTree(so_mye)
PlotClusterTree(so_mye)
# Distribution of cells in each histology
ggplot(so_mye@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~histology)
ggplot(so_mye@meta.data, aes(x = cluster_l2, fill = histology))+
geom_bar()+
theme_bw()+
facet_wrap(~cellfrac_type)
EGO on DEGs
subset_ego(curr_sub = so_mye,
curr_markers = mye_markers)
## Warning: ggrepel: 1738 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 1"
## Warning: ggrepel: 1919 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 3"
## Warning: ggrepel: 1757 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 4"
## Warning: ggrepel: 1554 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 14.2"
## Warning: ggrepel: 1170 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 15"
## Warning: ggrepel: 1000 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 16"
## Warning: ggrepel: 321 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 18"
## Warning: ggrepel: 1367 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 20"
## Warning: ggrepel: 1375 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "Evaluating cluster: 24"
c1 - Macrophage c3 - Neutrophil Lrg high (S100a8/9 high, low nFeature) c4 - Neutrophil Ccl{3,4} high (S100a8/9 high, low nFeature) c14.2 - Proliferating c15 - cDC2 (Cd209a) c16 - Monocyte (Plac8, Ly6c2) c18 - IFN response c20 - cDC1 (Clec9a, Cadm1) c24 - DC
mye_moi <- c('Cd14', 'Cd68', 'Cd74', 'Itgam', 'Itgax', 'Mki67', 'S100a8', 'S100a9', 'Ccl3', 'Ccl4', 'Cd209a', 'Plac8', 'Ly6c2', 'Isg15', 'Clec9a', 'Cadm1')
DotPlot(so_mye,
features = mye_moi,
cols = 'RdYlBu')+
theme_bw()+
coord_flip()
mye_dict <- tibble(cluster = sort(unique(so_mye@meta.data$cluster_l2)),
celltype = c('c1-Macrophage',
'c3-Neutrophil_Lrg_high',
'c4-Neutrophil_CCL3_high',
'c14.2-proliferating',
'c15-cDC2',
'c16-Monocyte',
'c18-IFN_response',
'c20-cDC1',
'c24-DC'))
so_mye@meta.data$celltype_full <- plyr::mapvalues(x = so_mye@meta.data$cluster_l2,
from = mye_dict$cluster,
to = mye_dict$celltype)
DimPlot(so_mye,
group.by = 'celltype_full',
label = TRUE)+
coord_equal()
# pheatmap of celltype_l2
mye_freq_table <- so_mye@meta.data %>%
dplyr::select(celltype_full, cellfrac_type) %>%
table()
mye_freq_table <- mye_freq_table[rowSums(mye_freq_table) != 0,]
pheatmap::pheatmap(prop.table(mye_freq_table, margin = 2),
display_numbers = mye_freq_table)
DotPlot(so_mye,
features = mye_moi,
cols = 'RdYlBu',
group.by = 'celltype_full')+
theme_bw()+
coord_flip()+
RotatedAxis()
ggplot(so_mye@meta.data, aes(x = celltype_full, fill = tumor))+
geom_bar()+
theme_bw()+
RotatedAxis()
save output
Add celltype_l2 to so_merge
celltype_full_dict <- rbind(epi_dict,
fibro_dict,
lym_dict,
mye_dict,
c(11, 'c11-endothelial'),
c(23, 'c23-perivascular'))
# Add Seurat metadata
so_merge@meta.data$celltype_full <- plyr::mapvalues(x = so_merge@meta.data$cluster_l2,
from = celltype_full_dict$cluster,
to = celltype_full_dict$celltype)
# correct rownames as barcodes
rownames(so_merge@meta.data) <- so_merge@meta.data$barcode
DimPlot(so_merge,
group.by = 'celltype_full',
label = TRUE,
label.size = 5)+
theme(legend.position = 'none')+
coord_equal()
Save seurat objects as .rds files
saveRDS(so_merge, file = 'analysis_files/s4_so_merge.rds')
sessionInfo()
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] org.Mm.eg.db_3.13.0 AnnotationDbi_1.56.2 IRanges_2.28.0
## [4] S4Vectors_0.32.3 Biobase_2.54.0 BiocGenerics_0.40.0
## [7] clusterProfiler_4.0.5 SeuratObject_4.0.4 Seurat_4.1.0
## [10] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8
## [13] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0
## [16] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
## [19] Matrix_1.3-4
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.24 tidyselect_1.1.2
## [4] RSQLite_2.2.9 htmlwidgets_1.5.4 grid_4.1.1
## [7] BiocParallel_1.28.3 Rtsne_0.15 scatterpie_0.1.7
## [10] munsell_0.5.0 codetools_0.2-18 ica_1.0-2
## [13] future_1.24.0 miniUI_0.1.1.1 withr_2.5.0
## [16] spatstat.random_2.1-0 colorspace_2.0-3 GOSemSim_2.18.1
## [19] highr_0.9 knitr_1.38 rstudioapi_0.13
## [22] ROCR_1.0-11 tensor_1.5 DOSE_3.18.3
## [25] listenv_0.8.0 labeling_0.4.2 GenomeInfoDbData_1.2.7
## [28] polyclip_1.10-0 pheatmap_1.0.12 farver_2.1.0
## [31] bit64_4.0.5 downloader_0.4 treeio_1.16.2
## [34] parallelly_1.30.0 vctrs_0.3.8 generics_0.1.2
## [37] xfun_0.30 R6_2.5.1 GenomeInfoDb_1.30.1
## [40] graphlayouts_0.7.1 rmdformats_1.0.3 gridGraphics_0.5-1
## [43] bitops_1.0-7 spatstat.utils_2.3-0 cachem_1.0.6
## [46] fgsea_1.18.0 assertthat_0.2.1 vroom_1.5.7
## [49] promises_1.2.0.1 scales_1.1.1 ggraph_2.0.5
## [52] enrichplot_1.12.3 gtable_0.3.0 globals_0.14.0
## [55] goftest_1.2-3 tidygraph_1.2.0 rlang_1.0.1
## [58] splines_4.1.1 lazyeval_0.2.2 spatstat.geom_2.3-2
## [61] broom_0.7.12 yaml_2.3.5 reshape2_1.4.4
## [64] abind_1.4-5 modelr_0.1.8 backports_1.3.0
## [67] httpuv_1.6.5 qvalue_2.24.0 tools_4.1.1
## [70] bookdown_0.24 ggplotify_0.1.0 ellipsis_0.3.2
## [73] spatstat.core_2.4-0 jquerylib_0.1.4 RColorBrewer_1.1-2
## [76] ggridges_0.5.3 Rcpp_1.0.7 plyr_1.8.6
## [79] zlibbioc_1.40.0 RCurl_1.98-1.5 rpart_4.1-15
## [82] deldir_1.0-6 viridis_0.6.2 pbapply_1.5-0
## [85] cowplot_1.1.1 zoo_1.8-9 haven_2.4.3
## [88] ggrepel_0.9.1 cluster_2.1.2 fs_1.5.2
## [91] magrittr_2.0.1 RSpectra_0.16-0 data.table_1.14.2
## [94] scattermore_0.8 DO.db_2.9 lmtest_0.9-39
## [97] reprex_2.0.1 RANN_2.6.1 ggnewscale_0.4.6
## [100] fitdistrplus_1.1-6 matrixStats_0.61.0 hms_1.1.1
## [103] patchwork_1.1.1 mime_0.12 evaluate_0.15
## [106] xtable_1.8-4 readxl_1.3.1 gridExtra_2.3
## [109] compiler_4.1.1 shadowtext_0.1.1 KernSmooth_2.23-20
## [112] crayon_1.5.0 htmltools_0.5.2 ggfun_0.0.5
## [115] mgcv_1.8-36 later_1.3.0 tzdb_0.2.0
## [118] aplot_0.1.2 lubridate_1.8.0 DBI_1.1.2
## [121] tweenr_1.0.2 dbplyr_2.1.1 MASS_7.3-54
## [124] cli_3.1.1 parallel_4.1.1 igraph_1.2.7
## [127] pkgconfig_2.0.3 plotly_4.10.0 spatstat.sparse_2.1-0
## [130] xml2_1.3.3 ggtree_3.0.4 bslib_0.3.1
## [133] XVector_0.34.0 rvest_1.0.2 yulab.utils_0.0.4
## [136] digest_0.6.27 sctransform_0.3.3 RcppAnnoy_0.0.19
## [139] spatstat.data_2.1-2 Biostrings_2.62.0 rmarkdown_2.13
## [142] cellranger_1.1.0 leiden_0.3.9 fastmatch_1.1-3
## [145] tidytree_0.3.8 uwot_0.1.11 shiny_1.7.1
## [148] lifecycle_1.0.1 nlme_3.1-152 jsonlite_1.7.2
## [151] limma_3.48.3 viridisLite_0.4.0 fansi_1.0.2
## [154] pillar_1.7.0 lattice_0.20-44 KEGGREST_1.34.0
## [157] fastmap_1.1.0 httr_1.4.2 survival_3.2-11
## [160] GO.db_3.13.0 glue_1.6.1 png_0.1-7
## [163] bit_4.0.4 ggforce_0.3.3 stringi_1.7.6
## [166] sass_0.4.0 blob_1.2.2 memoise_2.0.1
## [169] ape_5.5 irlba_2.3.5 future.apply_1.8.1